Load exploration
qplot(x=BMXHT,y=BMXWAIST,data=data,colour=RIAGENDR,alpha=I(0.1))

qplot(x=RIDAGEYR,y=BMXWAIST,data=data,colour=RIAGENDR,alpha=I(0.1))

qplot(x=BMXWT,y=BMXWAIST,data=data,colour=RIAGENDR,alpha=I(0.1))

regression models
test_df <- test_data |> select(-BMXWAIST)
run_model <- function(formula_str,train_data_set=train_data,
test_data_set=test_df){
# setup regression model
lm_reg = lm(formula = as.formula(formula_str), train_data_set)
print(summary(lm_reg))
# run prediction
lm_pred = predict(lm_reg, newdata = test_df, se = T)
# save prediction results
pred_df = data.frame(
fit = lm_pred$fit,
weight = test_data$BMXWT,
sex = test_data$RIAGENDR,
label = test_data$BMXWAIST
)
# compute MSE
mse<- mean_square_error(pred_df$fit, pred_df$label)
#plot results
g <- ggplot(pred_df, aes(x = weight, y = label)) + geom_point(colour = "black",alpha = 0.1) +
geom_point(aes(x = weight, y = fit, colour = sex,alpha = 0.1),
size = 1.5) + ylab("waist circumference")
g+ggtitle(paste("MSE = ",mse))
}
regression models weight
run_model("BMXWAIST ~ BMXWT")
##
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
##
## Residuals:
## Min 1Q Median 3Q Max
## -29.3723 -4.8747 -0.1382 4.7755 23.5811
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 43.865314 0.412785 106.3 <2e-16 ***
## BMXWT 0.682412 0.004917 138.8 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.222 on 4998 degrees of freedom
## Multiple R-squared: 0.794, Adjusted R-squared: 0.794
## F-statistic: 1.926e+04 on 1 and 4998 DF, p-value: < 2.2e-16

run_model("BMXWAIST ~ bs(BMXWT)")
##
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
##
## Residuals:
## Min 1Q Median 3Q Max
## -29.2327 -4.7743 -0.1098 4.7283 23.0092
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 65.2516 0.6783 96.20 <2e-16 ***
## bs(BMXWT)1 49.4273 2.2759 21.72 <2e-16 ***
## bs(BMXWT)2 87.3240 2.5591 34.12 <2e-16 ***
## bs(BMXWT)3 107.2327 4.4082 24.33 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.16 on 4996 degrees of freedom
## Multiple R-squared: 0.7976, Adjusted R-squared: 0.7975
## F-statistic: 6563 on 3 and 4996 DF, p-value: < 2.2e-16

run_model("BMXWAIST ~ bs(RIDAGEYR, df = 7) + BMXWT + RIAGENDR + years")
##
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
##
## Residuals:
## Min 1Q Median 3Q Max
## -29.0289 -3.9078 0.0723 3.8558 21.5874
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 38.276240 0.630800 60.679 < 2e-16 ***
## bs(RIDAGEYR, df = 7)1 -0.292606 0.929404 -0.315 0.75290
## bs(RIDAGEYR, df = 7)2 1.475108 0.680592 2.167 0.03025 *
## bs(RIDAGEYR, df = 7)3 2.018121 0.743094 2.716 0.00663 **
## bs(RIDAGEYR, df = 7)4 5.110992 0.644329 7.932 2.64e-15 ***
## bs(RIDAGEYR, df = 7)5 8.424950 0.839573 10.035 < 2e-16 ***
## bs(RIDAGEYR, df = 7)6 11.875666 0.914794 12.982 < 2e-16 ***
## bs(RIDAGEYR, df = 7)7 10.301733 1.023286 10.067 < 2e-16 ***
## BMXWT 0.719066 0.004319 166.492 < 2e-16 ***
## RIAGENDRMale -3.995553 0.177064 -22.566 < 2e-16 ***
## years2005-2006 -0.262251 0.366004 -0.717 0.47370
## years2007-2008 -0.021495 0.348841 -0.062 0.95087
## years2009-2010 -0.460441 0.342226 -1.345 0.17855
## years2013-2014 0.124610 0.349292 0.357 0.72129
## years2015-2016 0.781043 0.353866 2.207 0.02735 *
## years2017-2018 0.673979 0.357586 1.885 0.05951 .
## years2022-2012 0.002106 0.363684 0.006 0.99538
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.032 on 4983 degrees of freedom
## Multiple R-squared: 0.8568, Adjusted R-squared: 0.8563
## F-statistic: 1863 on 16 and 4983 DF, p-value: < 2.2e-16

run_model("BMXWAIST ~ ns(RIDAGEYR, df = 7) + BMXWT + RIAGENDR + years")
##
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
##
## Residuals:
## Min 1Q Median 3Q Max
## -29.0730 -3.9146 0.0451 3.8459 21.5567
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 38.13401 0.56302 67.731 < 2e-16 ***
## ns(RIDAGEYR, df = 7)1 2.14999 0.56011 3.839 0.000125 ***
## ns(RIDAGEYR, df = 7)2 2.50719 0.68061 3.684 0.000232 ***
## ns(RIDAGEYR, df = 7)3 4.99062 0.60665 8.227 2.44e-16 ***
## ns(RIDAGEYR, df = 7)4 6.73369 0.61603 10.931 < 2e-16 ***
## ns(RIDAGEYR, df = 7)5 10.17415 0.59901 16.985 < 2e-16 ***
## ns(RIDAGEYR, df = 7)6 11.29792 1.04356 10.826 < 2e-16 ***
## ns(RIDAGEYR, df = 7)7 11.02205 0.63264 17.422 < 2e-16 ***
## BMXWT 0.71904 0.00432 166.448 < 2e-16 ***
## RIAGENDRMale -3.99138 0.17704 -22.545 < 2e-16 ***
## years2005-2006 -0.23504 0.36541 -0.643 0.520112
## years2007-2008 0.02642 0.34687 0.076 0.939282
## years2009-2010 -0.40802 0.34011 -1.200 0.230321
## years2013-2014 0.17293 0.34745 0.498 0.618711
## years2015-2016 0.82748 0.35197 2.351 0.018761 *
## years2017-2018 0.71602 0.35612 2.011 0.044420 *
## years2022-2012 0.04735 0.36196 0.131 0.895928
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.032 on 4983 degrees of freedom
## Multiple R-squared: 0.8567, Adjusted R-squared: 0.8563
## F-statistic: 1862 on 16 and 4983 DF, p-value: < 2.2e-16

run_model("BMXWAIST ~ bs(RIDAGEYR, df = 7) + bs(BMXWT) + RIAGENDR + BMXHT")
##
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
##
## Residuals:
## Min 1Q Median 3Q Max
## -29.6852 -3.1909 0.0366 3.2605 21.9850
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 129.48065 1.79228 72.244 < 2e-16 ***
## bs(RIDAGEYR, df = 7)1 -0.52947 0.78626 -0.673 0.5007
## bs(RIDAGEYR, df = 7)2 0.61028 0.57631 1.059 0.2897
## bs(RIDAGEYR, df = 7)3 1.26110 0.62853 2.006 0.0449 *
## bs(RIDAGEYR, df = 7)4 3.82980 0.54493 7.028 2.38e-12 ***
## bs(RIDAGEYR, df = 7)5 6.49053 0.70960 9.147 < 2e-16 ***
## bs(RIDAGEYR, df = 7)6 8.88156 0.77303 11.489 < 2e-16 ***
## bs(RIDAGEYR, df = 7)7 8.80371 0.85725 10.270 < 2e-16 ***
## bs(BMXWT)1 65.41056 1.65931 39.420 < 2e-16 ***
## bs(BMXWT)2 96.00156 1.83515 52.313 < 2e-16 ***
## bs(BMXWT)3 126.77400 3.15875 40.134 < 2e-16 ***
## RIAGENDRMale 0.94313 0.20018 4.711 2.53e-06 ***
## BMXHT -0.45352 0.01091 -41.569 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.103 on 4987 degrees of freedom
## Multiple R-squared: 0.8974, Adjusted R-squared: 0.8971
## F-statistic: 3634 on 12 and 4987 DF, p-value: < 2.2e-16

run_model("BMXWAIST ~ bs(RIDAGEYR, df = 7) + BMXWT + RIAGENDR + BMXHT + years")
##
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
##
## Residuals:
## Min 1Q Median 3Q Max
## -29.6655 -3.2828 -0.0158 3.3540 21.9432
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 104.510469 1.821093 57.389 < 2e-16 ***
## bs(RIDAGEYR, df = 7)1 -0.503574 0.817543 -0.616 0.53795
## bs(RIDAGEYR, df = 7)2 1.083962 0.598751 1.810 0.07030 .
## bs(RIDAGEYR, df = 7)3 1.543654 0.653760 2.361 0.01825 *
## bs(RIDAGEYR, df = 7)4 4.341085 0.567124 7.655 2.31e-14 ***
## bs(RIDAGEYR, df = 7)5 6.867314 0.739633 9.285 < 2e-16 ***
## bs(RIDAGEYR, df = 7)6 9.572545 0.806930 11.863 < 2e-16 ***
## bs(RIDAGEYR, df = 7)7 8.366009 0.901531 9.280 < 2e-16 ***
## BMXWT 0.779610 0.004117 189.382 < 2e-16 ***
## RIAGENDRMale 1.260216 0.207849 6.063 1.43e-09 ***
## BMXHT -0.433638 0.011356 -38.186 < 2e-16 ***
## years2005-2006 -0.469043 0.321990 -1.457 0.14526
## years2007-2008 -0.300321 0.306935 -0.978 0.32790
## years2009-2010 -0.940394 0.301292 -3.121 0.00181 **
## years2013-2014 -0.231794 0.307387 -0.754 0.45084
## years2015-2016 -0.048571 0.312025 -0.156 0.87630
## years2017-2018 -0.302210 0.315578 -0.958 0.33829
## years2022-2012 -0.298817 0.320001 -0.934 0.35045
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.305 on 4982 degrees of freedom
## Multiple R-squared: 0.8892, Adjusted R-squared: 0.8888
## F-statistic: 2352 on 17 and 4982 DF, p-value: < 2.2e-16

run_model("BMXWAIST ~ bs(RIDAGEYR, df = 7) + invNorm(BMXWT) + RIAGENDR + BMXHT + years")
##
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
##
## Residuals:
## Min 1Q Median 3Q Max
## -27.5690 -3.5938 -0.1112 3.3492 28.3049
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 171.17821 2.03946 83.933 < 2e-16 ***
## bs(RIDAGEYR, df = 7)1 -0.61330 0.86133 -0.712 0.4765
## bs(RIDAGEYR, df = 7)2 0.07857 0.63111 0.124 0.9009
## bs(RIDAGEYR, df = 7)3 0.91193 0.68897 1.324 0.1857
## bs(RIDAGEYR, df = 7)4 3.42366 0.59781 5.727 1.08e-08 ***
## bs(RIDAGEYR, df = 7)5 5.97717 0.77956 7.667 2.10e-14 ***
## bs(RIDAGEYR, df = 7)6 8.21610 0.85031 9.662 < 2e-16 ***
## bs(RIDAGEYR, df = 7)7 8.89862 0.94985 9.368 < 2e-16 ***
## invNorm(BMXWT) 16.31658 0.09147 178.379 < 2e-16 ***
## RIAGENDRMale 0.38862 0.21868 1.777 0.0756 .
## BMXHT -0.44783 0.01201 -37.292 < 2e-16 ***
## years2005-2006 -0.16327 0.33920 -0.481 0.6303
## years2007-2008 -0.12564 0.32335 -0.389 0.6976
## years2009-2010 -0.41993 0.31736 -1.323 0.1858
## years2013-2014 -0.01626 0.32381 -0.050 0.9600
## years2015-2016 0.30453 0.32866 0.927 0.3542
## years2017-2018 0.01491 0.33238 0.045 0.9642
## years2022-2012 -0.10749 0.33713 -0.319 0.7499
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.59 on 4982 degrees of freedom
## Multiple R-squared: 0.877, Adjusted R-squared: 0.8766
## F-statistic: 2090 on 17 and 4982 DF, p-value: < 2.2e-16

run_model("BMXWAIST ~ bs(RIDAGEYR, df = 7) + BMXWT + RIAGENDR + invNorm(BMXHT) + years")
##
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
##
## Residuals:
## Min 1Q Median 3Q Max
## -29.4860 -3.3002 0.0119 3.3333 22.1949
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 32.241117 0.578546 55.728 < 2e-16 ***
## bs(RIDAGEYR, df = 7)1 -0.432309 0.819343 -0.528 0.59778
## bs(RIDAGEYR, df = 7)2 1.141647 0.600054 1.903 0.05715 .
## bs(RIDAGEYR, df = 7)3 1.644574 0.655164 2.510 0.01210 *
## bs(RIDAGEYR, df = 7)4 4.424307 0.568311 7.785 8.42e-15 ***
## bs(RIDAGEYR, df = 7)5 6.903075 0.741236 9.313 < 2e-16 ***
## bs(RIDAGEYR, df = 7)6 9.699799 0.808505 11.997 < 2e-16 ***
## bs(RIDAGEYR, df = 7)7 8.407813 0.903488 9.306 < 2e-16 ***
## BMXWT 0.778097 0.004115 189.085 < 2e-16 ***
## RIAGENDRMale 1.030185 0.205016 5.025 5.21e-07 ***
## invNorm(BMXHT) -4.248985 0.112371 -37.812 < 2e-16 ***
## years2005-2006 -0.483375 0.322711 -1.498 0.13423
## years2007-2008 -0.310190 0.307622 -1.008 0.31334
## years2009-2010 -0.917674 0.301939 -3.039 0.00238 **
## years2013-2014 -0.232762 0.308071 -0.756 0.44996
## years2015-2016 -0.046285 0.312724 -0.148 0.88234
## years2017-2018 -0.300068 0.316288 -0.949 0.34281
## years2022-2012 -0.295770 0.320710 -0.922 0.35645
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.317 on 4982 degrees of freedom
## Multiple R-squared: 0.8887, Adjusted R-squared: 0.8883
## F-statistic: 2340 on 17 and 4982 DF, p-value: < 2.2e-16

run_model("BMXWAIST ~ bs(RIDAGEYR, df = 7) + bs(BMXWT) + RIAGENDR + BMXHT + years")
##
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
##
## Residuals:
## Min 1Q Median 3Q Max
## -29.6964 -3.2159 0.0507 3.2231 22.1043
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 129.76775 1.82364 71.159 < 2e-16 ***
## bs(RIDAGEYR, df = 7)1 -0.54769 0.78600 -0.697 0.48596
## bs(RIDAGEYR, df = 7)2 0.64196 0.57620 1.114 0.26528
## bs(RIDAGEYR, df = 7)3 1.23408 0.62870 1.963 0.04971 *
## bs(RIDAGEYR, df = 7)4 3.87270 0.54571 7.097 1.46e-12 ***
## bs(RIDAGEYR, df = 7)5 6.42949 0.71140 9.038 < 2e-16 ***
## bs(RIDAGEYR, df = 7)6 8.97711 0.77641 11.562 < 2e-16 ***
## bs(RIDAGEYR, df = 7)7 8.67049 0.86683 10.003 < 2e-16 ***
## bs(BMXWT)1 65.27609 1.65983 39.327 < 2e-16 ***
## bs(BMXWT)2 96.09541 1.83650 52.325 < 2e-16 ***
## bs(BMXWT)3 126.68545 3.15856 40.109 < 2e-16 ***
## RIAGENDRMale 0.95514 0.20049 4.764 1.95e-06 ***
## BMXHT -0.45335 0.01096 -41.362 < 2e-16 ***
## years2005-2006 -0.39003 0.30957 -1.260 0.20777
## years2007-2008 -0.28987 0.29509 -0.982 0.32599
## years2009-2010 -0.79705 0.28982 -2.750 0.00598 **
## years2013-2014 -0.18996 0.29552 -0.643 0.52037
## years2015-2016 0.03255 0.30005 0.108 0.91362
## years2017-2018 -0.26765 0.30344 -0.882 0.37780
## years2022-2012 -0.25484 0.30768 -0.828 0.40756
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.1 on 4980 degrees of freedom
## Multiple R-squared: 0.8976, Adjusted R-squared: 0.8972
## F-statistic: 2298 on 19 and 4980 DF, p-value: < 2.2e-16

run_model("BMXWAIST ~ bs(RIDAGEYR, df = 7) + bs(BMXWT,df=7) + RIAGENDR + bs(BMXHT,df=7) + years")
##
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
##
## Residuals:
## Min 1Q Median 3Q Max
## -29.9407 -3.1450 0.0111 3.2425 22.0717
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 69.20583 4.09863 16.885 < 2e-16 ***
## bs(RIDAGEYR, df = 7)1 -0.51970 0.78663 -0.661 0.50886
## bs(RIDAGEYR, df = 7)2 0.57466 0.57704 0.996 0.31936
## bs(RIDAGEYR, df = 7)3 1.22967 0.62912 1.955 0.05069 .
## bs(RIDAGEYR, df = 7)4 3.86854 0.54646 7.079 1.65e-12 ***
## bs(RIDAGEYR, df = 7)5 6.39737 0.71200 8.985 < 2e-16 ***
## bs(RIDAGEYR, df = 7)6 9.00941 0.77700 11.595 < 2e-16 ***
## bs(RIDAGEYR, df = 7)7 8.69250 0.86743 10.021 < 2e-16 ***
## bs(BMXWT, df = 7)1 13.26019 3.18959 4.157 3.27e-05 ***
## bs(BMXWT, df = 7)2 24.68206 2.01210 12.267 < 2e-16 ***
## bs(BMXWT, df = 7)3 38.98781 2.32172 16.793 < 2e-16 ***
## bs(BMXWT, df = 7)4 47.84237 2.21391 21.610 < 2e-16 ***
## bs(BMXWT, df = 7)5 85.68573 2.49841 34.296 < 2e-16 ***
## bs(BMXWT, df = 7)6 109.51976 3.27675 33.423 < 2e-16 ***
## bs(BMXWT, df = 7)7 127.87759 4.51005 28.354 < 2e-16 ***
## RIAGENDRMale 0.90811 0.20772 4.372 1.26e-05 ***
## bs(BMXHT, df = 7)1 -7.08407 4.95934 -1.428 0.15323
## bs(BMXHT, df = 7)2 -11.08331 3.45580 -3.207 0.00135 **
## bs(BMXHT, df = 7)3 -15.99038 3.79332 -4.215 2.54e-05 ***
## bs(BMXHT, df = 7)4 -18.27354 3.68766 -4.955 7.46e-07 ***
## bs(BMXHT, df = 7)5 -26.10880 3.85155 -6.779 1.35e-11 ***
## bs(BMXHT, df = 7)6 -27.67786 3.95822 -6.993 3.06e-12 ***
## bs(BMXHT, df = 7)7 -41.77758 4.74141 -8.811 < 2e-16 ***
## years2005-2006 -0.36559 0.30980 -1.180 0.23802
## years2007-2008 -0.25477 0.29523 -0.863 0.38821
## years2009-2010 -0.76609 0.29000 -2.642 0.00828 **
## years2013-2014 -0.16200 0.29565 -0.548 0.58376
## years2015-2016 0.06028 0.30005 0.201 0.84080
## years2017-2018 -0.24868 0.30352 -0.819 0.41265
## years2022-2012 -0.23286 0.30778 -0.757 0.44935
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.098 on 4970 degrees of freedom
## Multiple R-squared: 0.8979, Adjusted R-squared: 0.8973
## F-statistic: 1507 on 29 and 4970 DF, p-value: < 2.2e-16

run_model("BMXWAIST ~ bs(RIDAGEYR, df = 7)*RIAGENDR + bs(BMXWT) + BMXHT + years")
##
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
##
## Residuals:
## Min 1Q Median 3Q Max
## -30.9159 -3.1634 0.0325 3.2716 20.9161
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 131.07075 1.84352 71.098 < 2e-16 ***
## bs(RIDAGEYR, df = 7)1 -0.69438 1.07827 -0.644 0.519624
## bs(RIDAGEYR, df = 7)2 -0.44677 0.78556 -0.569 0.569563
## bs(RIDAGEYR, df = 7)3 -0.63532 0.86754 -0.732 0.464003
## bs(RIDAGEYR, df = 7)4 1.63863 0.74599 2.197 0.028095 *
## bs(RIDAGEYR, df = 7)5 4.69405 0.96950 4.842 1.33e-06 ***
## bs(RIDAGEYR, df = 7)6 5.97096 1.05471 5.661 1.59e-08 ***
## bs(RIDAGEYR, df = 7)7 5.78590 1.14313 5.061 4.31e-07 ***
## RIAGENDRMale -2.81639 0.83360 -3.379 0.000734 ***
## bs(BMXWT)1 65.72118 1.64780 39.884 < 2e-16 ***
## bs(BMXWT)2 95.24128 1.82222 52.267 < 2e-16 ***
## bs(BMXWT)3 127.73706 3.13011 40.809 < 2e-16 ***
## BMXHT -0.45168 0.01086 -41.600 < 2e-16 ***
## years2005-2006 -0.48964 0.30654 -1.597 0.110261
## years2007-2008 -0.24037 0.29202 -0.823 0.410479
## years2009-2010 -0.76188 0.28687 -2.656 0.007938 **
## years2013-2014 -0.11919 0.29257 -0.407 0.683729
## years2015-2016 0.08282 0.29701 0.279 0.780367
## years2017-2018 -0.23332 0.30039 -0.777 0.437354
## years2022-2012 -0.22090 0.30462 -0.725 0.468389
## bs(RIDAGEYR, df = 7)1:RIAGENDRMale 0.89898 1.55932 0.577 0.564288
## bs(RIDAGEYR, df = 7)2:RIAGENDRMale 2.34148 1.14073 2.053 0.040162 *
## bs(RIDAGEYR, df = 7)3:RIAGENDRMale 4.26981 1.24393 3.433 0.000603 ***
## bs(RIDAGEYR, df = 7)4:RIAGENDRMale 4.74559 1.07827 4.401 1.10e-05 ***
## bs(RIDAGEYR, df = 7)5:RIAGENDRMale 4.22667 1.40496 3.008 0.002639 **
## bs(RIDAGEYR, df = 7)6:RIAGENDRMale 5.96284 1.52926 3.899 9.78e-05 ***
## bs(RIDAGEYR, df = 7)7:RIAGENDRMale 6.61731 1.70352 3.884 0.000104 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.045 on 4973 degrees of freedom
## Multiple R-squared: 0.9, Adjusted R-squared: 0.8995
## F-statistic: 1721 on 26 and 4973 DF, p-value: < 2.2e-16

run_model("BMXWAIST ~ bs(RIDAGEYR, df = 7)*RIAGENDR + bs(BMXWT) + RIAGENDR+ BMXHT + years")
##
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
##
## Residuals:
## Min 1Q Median 3Q Max
## -30.9159 -3.1634 0.0325 3.2716 20.9161
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 131.07075 1.84352 71.098 < 2e-16 ***
## bs(RIDAGEYR, df = 7)1 -0.69438 1.07827 -0.644 0.519624
## bs(RIDAGEYR, df = 7)2 -0.44677 0.78556 -0.569 0.569563
## bs(RIDAGEYR, df = 7)3 -0.63532 0.86754 -0.732 0.464003
## bs(RIDAGEYR, df = 7)4 1.63863 0.74599 2.197 0.028095 *
## bs(RIDAGEYR, df = 7)5 4.69405 0.96950 4.842 1.33e-06 ***
## bs(RIDAGEYR, df = 7)6 5.97096 1.05471 5.661 1.59e-08 ***
## bs(RIDAGEYR, df = 7)7 5.78590 1.14313 5.061 4.31e-07 ***
## RIAGENDRMale -2.81639 0.83360 -3.379 0.000734 ***
## bs(BMXWT)1 65.72118 1.64780 39.884 < 2e-16 ***
## bs(BMXWT)2 95.24128 1.82222 52.267 < 2e-16 ***
## bs(BMXWT)3 127.73706 3.13011 40.809 < 2e-16 ***
## BMXHT -0.45168 0.01086 -41.600 < 2e-16 ***
## years2005-2006 -0.48964 0.30654 -1.597 0.110261
## years2007-2008 -0.24037 0.29202 -0.823 0.410479
## years2009-2010 -0.76188 0.28687 -2.656 0.007938 **
## years2013-2014 -0.11919 0.29257 -0.407 0.683729
## years2015-2016 0.08282 0.29701 0.279 0.780367
## years2017-2018 -0.23332 0.30039 -0.777 0.437354
## years2022-2012 -0.22090 0.30462 -0.725 0.468389
## bs(RIDAGEYR, df = 7)1:RIAGENDRMale 0.89898 1.55932 0.577 0.564288
## bs(RIDAGEYR, df = 7)2:RIAGENDRMale 2.34148 1.14073 2.053 0.040162 *
## bs(RIDAGEYR, df = 7)3:RIAGENDRMale 4.26981 1.24393 3.433 0.000603 ***
## bs(RIDAGEYR, df = 7)4:RIAGENDRMale 4.74559 1.07827 4.401 1.10e-05 ***
## bs(RIDAGEYR, df = 7)5:RIAGENDRMale 4.22667 1.40496 3.008 0.002639 **
## bs(RIDAGEYR, df = 7)6:RIAGENDRMale 5.96284 1.52926 3.899 9.78e-05 ***
## bs(RIDAGEYR, df = 7)7:RIAGENDRMale 6.61731 1.70352 3.884 0.000104 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.045 on 4973 degrees of freedom
## Multiple R-squared: 0.9, Adjusted R-squared: 0.8995
## F-statistic: 1721 on 26 and 4973 DF, p-value: < 2.2e-16

run_model("BMXWAIST ~ bs(RIDAGEYR, df = 7)*RIAGENDR + bs(BMXWT) + BMXHT")
##
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
##
## Residuals:
## Min 1Q Median 3Q Max
## -30.8866 -3.1477 0.0247 3.2572 20.6844
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 130.83953 1.81340 72.152 < 2e-16 ***
## bs(RIDAGEYR, df = 7)1 -0.67331 1.07829 -0.624 0.532381
## bs(RIDAGEYR, df = 7)2 -0.47603 0.78570 -0.606 0.544628
## bs(RIDAGEYR, df = 7)3 -0.57909 0.86737 -0.668 0.504393
## bs(RIDAGEYR, df = 7)4 1.60258 0.74453 2.152 0.031408 *
## bs(RIDAGEYR, df = 7)5 4.78568 0.96732 4.947 7.77e-07 ***
## bs(RIDAGEYR, df = 7)6 5.90639 1.05093 5.620 2.01e-08 ***
## bs(RIDAGEYR, df = 7)7 5.90886 1.13554 5.204 2.03e-07 ***
## RIAGENDRMale -2.79536 0.83363 -3.353 0.000805 ***
## bs(BMXWT)1 65.84560 1.64746 39.968 < 2e-16 ***
## bs(BMXWT)2 95.17391 1.82101 52.264 < 2e-16 ***
## bs(BMXWT)3 127.80215 3.13060 40.824 < 2e-16 ***
## BMXHT -0.45211 0.01081 -41.831 < 2e-16 ***
## bs(RIDAGEYR, df = 7)1:RIAGENDRMale 0.87658 1.55938 0.562 0.574052
## bs(RIDAGEYR, df = 7)2:RIAGENDRMale 2.33369 1.14076 2.046 0.040835 *
## bs(RIDAGEYR, df = 7)3:RIAGENDRMale 4.20345 1.24400 3.379 0.000733 ***
## bs(RIDAGEYR, df = 7)4:RIAGENDRMale 4.75758 1.07739 4.416 1.03e-05 ***
## bs(RIDAGEYR, df = 7)5:RIAGENDRMale 4.12616 1.40464 2.938 0.003324 **
## bs(RIDAGEYR, df = 7)6:RIAGENDRMale 5.96003 1.52891 3.898 9.82e-05 ***
## bs(RIDAGEYR, df = 7)7:RIAGENDRMale 6.54486 1.70422 3.840 0.000124 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.048 on 4980 degrees of freedom
## Multiple R-squared: 0.8997, Adjusted R-squared: 0.8994
## F-statistic: 2352 on 19 and 4980 DF, p-value: < 2.2e-16

# grid.arrange(g1, g2,g3, nrow=3)
with BMI
run_model("BMXWAIST ~ BMXBMI")
##
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
##
## Residuals:
## Min 1Q Median 3Q Max
## -35.280 -4.613 -0.013 4.764 23.156
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 36.58903 0.44626 81.99 <2e-16 ***
## BMXBMI 2.15998 0.01497 144.29 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.001 on 4998 degrees of freedom
## Multiple R-squared: 0.8064, Adjusted R-squared: 0.8064
## F-statistic: 2.082e+04 on 1 and 4998 DF, p-value: < 2.2e-16

run_model("BMXWAIST ~ BMXWT + BMXHT")
##
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
##
## Residuals:
## Min 1Q Median 3Q Max
## -33.808 -3.997 0.020 4.072 20.894
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 108.778055 1.476754 73.66 <2e-16 ***
## BMXWT 0.777885 0.004649 167.33 <2e-16 ***
## BMXHT -0.435119 0.009621 -45.23 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.084 on 4997 degrees of freedom
## Multiple R-squared: 0.8538, Adjusted R-squared: 0.8538
## F-statistic: 1.459e+04 on 2 and 4997 DF, p-value: < 2.2e-16

run_model("BMXWAIST ~ BMXWT + BMXHT + BMXBMI")
##
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
##
## Residuals:
## Min 1Q Median 3Q Max
## -34.238 -3.981 -0.001 4.113 20.670
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 67.24995 6.41356 10.486 < 2e-16 ***
## BMXWT 0.52600 0.03815 13.789 < 2e-16 ***
## BMXHT -0.18744 0.03845 -4.875 1.12e-06 ***
## BMXBMI 0.71029 0.10677 6.652 3.20e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.058 on 4996 degrees of freedom
## Multiple R-squared: 0.8551, Adjusted R-squared: 0.855
## F-statistic: 9829 on 3 and 4996 DF, p-value: < 2.2e-16

run_model("BMXWAIST ~ bs(BMXBMI,df=7)")
##
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
##
## Residuals:
## Min 1Q Median 3Q Max
## -33.264 -4.468 -0.052 4.572 25.709
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 69.032 3.675 18.782 < 2e-16 ***
## bs(BMXBMI, df = 7)1 -4.366 5.130 -0.851 0.394805
## bs(BMXBMI, df = 7)2 12.024 3.399 3.537 0.000408 ***
## bs(BMXBMI, df = 7)3 25.591 3.781 6.769 1.45e-11 ***
## bs(BMXBMI, df = 7)4 34.191 3.655 9.356 < 2e-16 ***
## bs(BMXBMI, df = 7)5 61.130 3.945 15.494 < 2e-16 ***
## bs(BMXBMI, df = 7)6 82.407 4.458 18.486 < 2e-16 ***
## bs(BMXBMI, df = 7)7 92.484 5.182 17.846 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.776 on 4992 degrees of freedom
## Multiple R-squared: 0.8189, Adjusted R-squared: 0.8186
## F-statistic: 3225 on 7 and 4992 DF, p-value: < 2.2e-16
## Warning in bs(BMXBMI, degree = 3L, knots = c(`20%` = 23.698, `40%` = 26.5, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases

run_model("BMXWAIST ~ bs(BMXBMI,df=7)*RIAGENDR")
##
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
##
## Residuals:
## Min 1Q Median 3Q Max
## -29.3432 -4.1309 -0.0958 4.0150 22.6188
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 68.750 4.090 16.810 < 2e-16 ***
## bs(BMXBMI, df = 7)1 -6.653 5.677 -1.172 0.24127
## bs(BMXBMI, df = 7)2 12.324 3.822 3.225 0.00127 **
## bs(BMXBMI, df = 7)3 23.070 4.213 5.476 4.57e-08 ***
## bs(BMXBMI, df = 7)4 31.057 4.076 7.619 3.04e-14 ***
## bs(BMXBMI, df = 7)5 58.816 4.393 13.389 < 2e-16 ***
## bs(BMXBMI, df = 7)6 75.125 4.935 15.223 < 2e-16 ***
## bs(BMXBMI, df = 7)7 92.387 5.432 17.008 < 2e-16 ***
## RIAGENDRMale -2.168 6.960 -0.312 0.75543
## bs(BMXBMI, df = 7)1:RIAGENDRMale 9.247 9.752 0.948 0.34303
## bs(BMXBMI, df = 7)2:RIAGENDRMale 2.494 6.411 0.389 0.69727
## bs(BMXBMI, df = 7)3:RIAGENDRMale 7.245 7.158 1.012 0.31151
## bs(BMXBMI, df = 7)4:RIAGENDRMale 8.070 6.914 1.167 0.24318
## bs(BMXBMI, df = 7)5:RIAGENDRMale 11.322 7.490 1.512 0.13070
## bs(BMXBMI, df = 7)6:RIAGENDRMale 26.310 8.647 3.043 0.00236 **
## bs(BMXBMI, df = 7)7:RIAGENDRMale 16.355 11.538 1.417 0.15640
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.084 on 4984 degrees of freedom
## Multiple R-squared: 0.8542, Adjusted R-squared: 0.8538
## F-statistic: 1947 on 15 and 4984 DF, p-value: < 2.2e-16
## Warning in bs(BMXBMI, degree = 3L, knots = c(`20%` = 23.698, `40%` = 26.5, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases

run_model("BMXWAIST ~ bs(RIDAGEYR, df = 7) + bs(BMXBMI,df=7)*RIAGENDR")
##
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
##
## Residuals:
## Min 1Q Median 3Q Max
## -26.6971 -3.6161 -0.1469 3.5118 23.6668
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 63.1146 3.8170 16.535 < 2e-16 ***
## bs(RIDAGEYR, df = 7)1 -0.4781 0.8694 -0.550 0.58240
## bs(RIDAGEYR, df = 7)2 0.6127 0.6378 0.961 0.33676
## bs(RIDAGEYR, df = 7)3 1.2412 0.6948 1.786 0.07409 .
## bs(RIDAGEYR, df = 7)4 3.4396 0.6027 5.707 1.22e-08 ***
## bs(RIDAGEYR, df = 7)5 5.6256 0.7841 7.175 8.31e-13 ***
## bs(RIDAGEYR, df = 7)6 6.8470 0.8534 8.023 1.27e-15 ***
## bs(RIDAGEYR, df = 7)7 7.2470 0.9469 7.654 2.33e-14 ***
## bs(BMXBMI, df = 7)1 -1.7501 5.2626 -0.333 0.73948
## bs(BMXBMI, df = 7)2 15.0842 3.5419 4.259 2.09e-05 ***
## bs(BMXBMI, df = 7)3 25.8088 3.9040 6.611 4.22e-11 ***
## bs(BMXBMI, df = 7)4 33.5146 3.7771 8.873 < 2e-16 ***
## bs(BMXBMI, df = 7)5 62.3744 4.0721 15.318 < 2e-16 ***
## bs(BMXBMI, df = 7)6 77.9231 4.5742 17.035 < 2e-16 ***
## bs(BMXBMI, df = 7)7 96.7677 5.0343 19.222 < 2e-16 ***
## RIAGENDRMale 0.7498 6.4512 0.116 0.90748
## bs(BMXBMI, df = 7)1:RIAGENDRMale 3.8685 9.0428 0.428 0.66882
## bs(BMXBMI, df = 7)2:RIAGENDRMale 0.5399 5.9413 0.091 0.92759
## bs(BMXBMI, df = 7)3:RIAGENDRMale 4.1655 6.6354 0.628 0.53018
## bs(BMXBMI, df = 7)4:RIAGENDRMale 5.2843 6.4082 0.825 0.40963
## bs(BMXBMI, df = 7)5:RIAGENDRMale 8.2247 6.9439 1.184 0.23629
## bs(BMXBMI, df = 7)6:RIAGENDRMale 22.7391 8.0123 2.838 0.00456 **
## bs(BMXBMI, df = 7)7:RIAGENDRMale 16.0643 10.7015 1.501 0.13339
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.635 on 4977 degrees of freedom
## Multiple R-squared: 0.8751, Adjusted R-squared: 0.8746
## F-statistic: 1585 on 22 and 4977 DF, p-value: < 2.2e-16
## Warning in bs(BMXBMI, degree = 3L, knots = c(`20%` = 23.698, `40%` = 26.5, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases

run_model("BMXWAIST ~ bs(RIDAGEYR) + bs(BMXBMI,df=7)*RIAGENDR")
##
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
##
## Residuals:
## Min 1Q Median 3Q Max
## -26.8047 -3.6006 -0.1526 3.5137 23.5800
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 62.97722 3.80109 16.568 < 2e-16 ***
## bs(RIDAGEYR)1 -0.08702 0.87789 -0.099 0.92104
## bs(RIDAGEYR)2 6.13320 0.65206 9.406 < 2e-16 ***
## bs(RIDAGEYR)3 7.34483 0.57263 12.827 < 2e-16 ***
## bs(BMXBMI, df = 7)1 -1.74500 5.25995 -0.332 0.74009
## bs(BMXBMI, df = 7)2 15.11513 3.53966 4.270 1.99e-05 ***
## bs(BMXBMI, df = 7)3 25.83294 3.90209 6.620 3.96e-11 ***
## bs(BMXBMI, df = 7)4 33.54225 3.77525 8.885 < 2e-16 ***
## bs(BMXBMI, df = 7)5 62.38433 4.07017 15.327 < 2e-16 ***
## bs(BMXBMI, df = 7)6 77.98325 4.57071 17.062 < 2e-16 ***
## bs(BMXBMI, df = 7)7 96.78097 5.03224 19.232 < 2e-16 ***
## RIAGENDRMale 0.61459 6.44595 0.095 0.92404
## bs(BMXBMI, df = 7)1:RIAGENDRMale 4.09644 9.03350 0.453 0.65023
## bs(BMXBMI, df = 7)2:RIAGENDRMale 0.63747 5.93654 0.107 0.91449
## bs(BMXBMI, df = 7)3:RIAGENDRMale 4.30260 6.63011 0.649 0.51640
## bs(BMXBMI, df = 7)4:RIAGENDRMale 5.41220 6.40329 0.845 0.39803
## bs(BMXBMI, df = 7)5:RIAGENDRMale 8.38355 6.93762 1.208 0.22694
## bs(BMXBMI, df = 7)6:RIAGENDRMale 22.78357 8.00740 2.845 0.00445 **
## bs(BMXBMI, df = 7)7:RIAGENDRMale 16.29167 10.68503 1.525 0.12739
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.633 on 4981 degrees of freedom
## Multiple R-squared: 0.8751, Adjusted R-squared: 0.8746
## F-statistic: 1939 on 18 and 4981 DF, p-value: < 2.2e-16
## Warning in bs(BMXBMI, degree = 3L, knots = c(`20%` = 23.698, `40%` = 26.5, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases

run_model("BMXWAIST ~ bs(RIDAGEYR, df = 7) + bs(BMXBMI,df=7)*RIAGENDR + BMXHT")
##
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
##
## Residuals:
## Min 1Q Median 3Q Max
## -27.8562 -3.2529 -0.0377 3.2791 21.4987
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.78012 3.87207 1.493 0.13556
## bs(RIDAGEYR, df = 7)1 -0.49713 0.78783 -0.631 0.52807
## bs(RIDAGEYR, df = 7)2 0.59545 0.57793 1.030 0.30291
## bs(RIDAGEYR, df = 7)3 1.20719 0.62963 1.917 0.05526 .
## bs(RIDAGEYR, df = 7)4 3.78504 0.54624 6.929 4.77e-12 ***
## bs(RIDAGEYR, df = 7)5 6.40054 0.71089 9.004 < 2e-16 ***
## bs(RIDAGEYR, df = 7)6 8.82192 0.77564 11.374 < 2e-16 ***
## bs(RIDAGEYR, df = 7)7 8.95609 0.85960 10.419 < 2e-16 ***
## bs(BMXBMI, df = 7)1 3.37323 4.77134 0.707 0.47961
## bs(BMXBMI, df = 7)2 17.72547 3.21050 5.521 3.54e-08 ***
## bs(BMXBMI, df = 7)3 29.65608 3.53964 8.378 < 2e-16 ***
## bs(BMXBMI, df = 7)4 37.29638 3.42463 10.891 < 2e-16 ***
## bs(BMXBMI, df = 7)5 65.79094 3.69142 17.823 < 2e-16 ***
## bs(BMXBMI, df = 7)6 82.09602 4.14691 19.797 < 2e-16 ***
## bs(BMXBMI, df = 7)7 98.23367 4.56211 21.533 < 2e-16 ***
## RIAGENDRMale -1.83410 5.84637 -0.314 0.75375
## BMXHT 0.33234 0.01009 32.941 < 2e-16 ***
## bs(BMXBMI, df = 7)1:RIAGENDRMale 2.45790 8.19433 0.300 0.76423
## bs(BMXBMI, df = 7)2:RIAGENDRMale -0.85085 5.38398 -0.158 0.87444
## bs(BMXBMI, df = 7)3:RIAGENDRMale 2.16509 6.01304 0.360 0.71881
## bs(BMXBMI, df = 7)4:RIAGENDRMale 3.12682 5.80723 0.538 0.59030
## bs(BMXBMI, df = 7)5:RIAGENDRMale 6.20527 6.29258 0.986 0.32412
## bs(BMXBMI, df = 7)6:RIAGENDRMale 18.84517 7.26143 2.595 0.00948 **
## bs(BMXBMI, df = 7)7:RIAGENDRMale 12.65650 9.69784 1.305 0.19192
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.106 on 4976 degrees of freedom
## Multiple R-squared: 0.8975, Adjusted R-squared: 0.897
## F-statistic: 1894 on 23 and 4976 DF, p-value: < 2.2e-16
## Warning in bs(BMXBMI, degree = 3L, knots = c(`20%` = 23.698, `40%` = 26.5, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases

run_model("BMXWAIST ~ bs(RIDAGEYR, df = 7) + bs(BMXBMI,df=7)*RIAGENDR + bs(BMXHT)")
##
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
##
## Residuals:
## Min 1Q Median 3Q Max
## -28.0082 -3.2273 -0.0345 3.2790 21.4527
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 50.2740 3.7142 13.536 < 2e-16 ***
## bs(RIDAGEYR, df = 7)1 -0.4773 0.7873 -0.606 0.54440
## bs(RIDAGEYR, df = 7)2 0.5362 0.5778 0.928 0.35350
## bs(RIDAGEYR, df = 7)3 1.2016 0.6294 1.909 0.05630 .
## bs(RIDAGEYR, df = 7)4 3.7778 0.5459 6.920 5.08e-12 ***
## bs(RIDAGEYR, df = 7)5 6.3721 0.7104 8.969 < 2e-16 ***
## bs(RIDAGEYR, df = 7)6 8.8152 0.7752 11.371 < 2e-16 ***
## bs(RIDAGEYR, df = 7)7 8.9829 0.8590 10.457 < 2e-16 ***
## bs(BMXBMI, df = 7)1 3.6833 4.7689 0.772 0.43993
## bs(BMXBMI, df = 7)2 17.9075 3.2087 5.581 2.52e-08 ***
## bs(BMXBMI, df = 7)3 29.9117 3.5381 8.454 < 2e-16 ***
## bs(BMXBMI, df = 7)4 37.5515 3.4231 10.970 < 2e-16 ***
## bs(BMXBMI, df = 7)5 66.0049 3.6894 17.890 < 2e-16 ***
## bs(BMXBMI, df = 7)6 82.3773 4.1449 19.874 < 2e-16 ***
## bs(BMXBMI, df = 7)7 98.2841 4.5588 21.559 < 2e-16 ***
## RIAGENDRMale -1.9749 5.8433 -0.338 0.73540
## bs(BMXHT)1 6.2055 3.0064 2.064 0.03906 *
## bs(BMXHT)2 18.8865 1.2254 15.413 < 2e-16 ***
## bs(BMXHT)3 19.4071 2.6173 7.415 1.42e-13 ***
## bs(BMXBMI, df = 7)1:RIAGENDRMale 2.4877 8.1889 0.304 0.76130
## bs(BMXBMI, df = 7)2:RIAGENDRMale -0.6946 5.3808 -0.129 0.89729
## bs(BMXBMI, df = 7)3:RIAGENDRMale 2.1814 6.0090 0.363 0.71661
## bs(BMXBMI, df = 7)4:RIAGENDRMale 3.1559 5.8034 0.544 0.58660
## bs(BMXBMI, df = 7)5:RIAGENDRMale 6.2933 6.2882 1.001 0.31697
## bs(BMXBMI, df = 7)6:RIAGENDRMale 19.0439 7.2574 2.624 0.00872 **
## bs(BMXBMI, df = 7)7:RIAGENDRMale 13.1318 9.6920 1.355 0.17551
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.103 on 4974 degrees of freedom
## Multiple R-squared: 0.8977, Adjusted R-squared: 0.8972
## F-statistic: 1745 on 25 and 4974 DF, p-value: < 2.2e-16
## Warning in bs(BMXBMI, degree = 3L, knots = c(`20%` = 23.698, `40%` = 26.5, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases

run_model("BMXWAIST ~ bs(RIDAGEYR, df = 7) + bs(BMXBMI,df=7)*RIAGENDR + BMXWT")
##
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
##
## Residuals:
## Min 1Q Median 3Q Max
## -28.4531 -3.2040 0.0178 3.2398 21.5454
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 48.88262 3.46399 14.112 < 2e-16 ***
## bs(RIDAGEYR, df = 7)1 -0.53179 0.78323 -0.679 0.49718
## bs(RIDAGEYR, df = 7)2 0.60611 0.57455 1.055 0.29150
## bs(RIDAGEYR, df = 7)3 1.10998 0.62595 1.773 0.07625 .
## bs(RIDAGEYR, df = 7)4 3.80219 0.54305 7.002 2.87e-12 ***
## bs(RIDAGEYR, df = 7)5 6.36138 0.70668 9.002 < 2e-16 ***
## bs(RIDAGEYR, df = 7)6 8.81049 0.77097 11.428 < 2e-16 ***
## bs(RIDAGEYR, df = 7)7 8.67978 0.85405 10.163 < 2e-16 ***
## bs(BMXBMI, df = 7)1 -1.63954 4.74092 -0.346 0.72949
## bs(BMXBMI, df = 7)2 9.63609 3.19475 3.016 0.00257 **
## bs(BMXBMI, df = 7)3 16.44795 3.52778 4.662 3.21e-06 ***
## bs(BMXBMI, df = 7)4 21.27040 3.42168 6.216 5.50e-10 ***
## bs(BMXBMI, df = 7)5 37.43621 3.74096 10.007 < 2e-16 ***
## bs(BMXBMI, df = 7)6 43.30599 4.24458 10.203 < 2e-16 ***
## bs(BMXBMI, df = 7)7 46.22252 4.77252 9.685 < 2e-16 ***
## RIAGENDRMale -0.55529 5.81179 -0.096 0.92389
## BMXWT 0.34107 0.01003 34.010 < 2e-16 ***
## bs(BMXBMI, df = 7)1:RIAGENDRMale 2.98931 8.14634 0.367 0.71367
## bs(BMXBMI, df = 7)2:RIAGENDRMale -1.11226 5.35255 -0.208 0.83539
## bs(BMXBMI, df = 7)3:RIAGENDRMale 1.34971 5.97815 0.226 0.82139
## bs(BMXBMI, df = 7)4:RIAGENDRMale 1.68279 5.77388 0.291 0.77072
## bs(BMXBMI, df = 7)5:RIAGENDRMale 2.76426 6.25755 0.442 0.65869
## bs(BMXBMI, df = 7)6:RIAGENDRMale 11.91382 7.22503 1.649 0.09922 .
## bs(BMXBMI, df = 7)7:RIAGENDRMale 2.36852 9.64899 0.245 0.80610
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.077 on 4976 degrees of freedom
## Multiple R-squared: 0.8987, Adjusted R-squared: 0.8982
## F-statistic: 1919 on 23 and 4976 DF, p-value: < 2.2e-16
## Warning in bs(BMXBMI, degree = 3L, knots = c(`20%` = 23.698, `40%` = 26.5, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases

run_model("BMXWAIST ~ bs(RIDAGEYR, df = 7) + bs(BMXBMI,df=7)*RIAGENDR + bs(BMXWT) + bs(BMXHT)")
##
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
##
## Residuals:
## Min 1Q Median 3Q Max
## -28.7818 -3.2043 -0.0148 3.2379 21.4550
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 52.6336 12.3622 4.258 2.1e-05 ***
## bs(RIDAGEYR, df = 7)1 -0.5191 0.7824 -0.663 0.5071
## bs(RIDAGEYR, df = 7)2 0.5124 0.5744 0.892 0.3723
## bs(RIDAGEYR, df = 7)3 1.0835 0.6258 1.732 0.0834 .
## bs(RIDAGEYR, df = 7)4 3.7774 0.5426 6.962 3.8e-12 ***
## bs(RIDAGEYR, df = 7)5 6.3173 0.7059 8.949 < 2e-16 ***
## bs(RIDAGEYR, df = 7)6 8.7881 0.7703 11.408 < 2e-16 ***
## bs(RIDAGEYR, df = 7)7 8.6950 0.8542 10.179 < 2e-16 ***
## bs(BMXBMI, df = 7)1 3.1605 8.1146 0.389 0.6969
## bs(BMXBMI, df = 7)2 18.5216 13.1601 1.407 0.1594
## bs(BMXBMI, df = 7)3 29.6260 19.4960 1.520 0.1287
## bs(BMXBMI, df = 7)4 36.8089 23.0453 1.597 0.1103
## bs(BMXBMI, df = 7)5 61.2786 36.1213 1.696 0.0899 .
## bs(BMXBMI, df = 7)6 69.8472 41.2720 1.692 0.0906 .
## bs(BMXBMI, df = 7)7 76.9340 47.8398 1.608 0.1079
## RIAGENDRMale -0.8923 5.8260 -0.153 0.8783
## bs(BMXWT)1 -6.7810 39.8696 -0.170 0.8650
## bs(BMXWT)2 19.2442 37.7508 0.510 0.6102
## bs(BMXWT)3 24.6989 56.9119 0.434 0.6643
## bs(BMXHT)1 5.7762 10.3515 0.558 0.5769
## bs(BMXHT)2 16.2061 18.0580 0.897 0.3695
## bs(BMXHT)3 11.6001 24.9546 0.465 0.6421
## bs(BMXBMI, df = 7)1:RIAGENDRMale 3.1021 8.1369 0.381 0.7030
## bs(BMXBMI, df = 7)2:RIAGENDRMale -0.6639 5.3629 -0.124 0.9015
## bs(BMXBMI, df = 7)3:RIAGENDRMale 1.6340 5.9965 0.272 0.7853
## bs(BMXBMI, df = 7)4:RIAGENDRMale 1.8860 5.7944 0.325 0.7448
## bs(BMXBMI, df = 7)5:RIAGENDRMale 2.8524 6.2825 0.454 0.6498
## bs(BMXBMI, df = 7)6:RIAGENDRMale 10.6235 7.3949 1.437 0.1509
## bs(BMXBMI, df = 7)7:RIAGENDRMale 8.7937 12.9829 0.677 0.4982
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.07 on 4971 degrees of freedom
## Multiple R-squared: 0.899, Adjusted R-squared: 0.8985
## F-statistic: 1581 on 28 and 4971 DF, p-value: < 2.2e-16
## Warning in bs(BMXBMI, degree = 3L, knots = c(`20%` = 23.698, `40%` = 26.5, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases

run_model("BMXWAIST ~ bs(RIDAGEYR, df = 7) + bs(BMXBMI,df=7)*RIAGENDR + BMXWT + BMXHT + years")
##
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
##
## Residuals:
## Min 1Q Median 3Q Max
## -28.4946 -3.2323 0.0243 3.2239 21.6942
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 48.784045 6.772695 7.203 6.77e-13 ***
## bs(RIDAGEYR, df = 7)1 -0.551260 0.782943 -0.704 0.48141
## bs(RIDAGEYR, df = 7)2 0.637920 0.574395 1.111 0.26680
## bs(RIDAGEYR, df = 7)3 1.084544 0.626184 1.732 0.08334 .
## bs(RIDAGEYR, df = 7)4 3.840357 0.543774 7.062 1.86e-12 ***
## bs(RIDAGEYR, df = 7)5 6.300188 0.708492 8.892 < 2e-16 ***
## bs(RIDAGEYR, df = 7)6 8.903066 0.774289 11.498 < 2e-16 ***
## bs(RIDAGEYR, df = 7)7 8.559773 0.864535 9.901 < 2e-16 ***
## bs(BMXBMI, df = 7)1 -1.433239 4.788154 -0.299 0.76470
## bs(BMXBMI, df = 7)2 9.667675 3.357950 2.879 0.00401 **
## bs(BMXBMI, df = 7)3 16.586470 3.910158 4.242 2.26e-05 ***
## bs(BMXBMI, df = 7)4 21.393432 3.984290 5.369 8.26e-08 ***
## bs(BMXBMI, df = 7)5 37.616719 5.184384 7.256 4.61e-13 ***
## bs(BMXBMI, df = 7)6 43.599449 6.487478 6.721 2.01e-11 ***
## bs(BMXBMI, df = 7)7 46.445105 8.107393 5.729 1.07e-08 ***
## RIAGENDRMale -0.449955 5.814097 -0.077 0.93832
## BMXWT 0.339038 0.044051 7.696 1.68e-14 ***
## BMXHT 0.002447 0.044061 0.056 0.95571
## years2005-2006 -0.411412 0.308202 -1.335 0.18198
## years2007-2008 -0.322565 0.293889 -1.098 0.27244
## years2009-2010 -0.821936 0.288420 -2.850 0.00439 **
## years2013-2014 -0.164556 0.294160 -0.559 0.57591
## years2015-2016 0.054766 0.298569 0.183 0.85447
## years2017-2018 -0.210854 0.302186 -0.698 0.48536
## years2022-2012 -0.220935 0.306265 -0.721 0.47071
## bs(BMXBMI, df = 7)1:RIAGENDRMale 2.750295 8.146386 0.338 0.73567
## bs(BMXBMI, df = 7)2:RIAGENDRMale -1.200781 5.352091 -0.224 0.82249
## bs(BMXBMI, df = 7)3:RIAGENDRMale 1.226864 5.977941 0.205 0.83740
## bs(BMXBMI, df = 7)4:RIAGENDRMale 1.587317 5.775753 0.275 0.78346
## bs(BMXBMI, df = 7)5:RIAGENDRMale 2.764122 6.270469 0.441 0.65937
## bs(BMXBMI, df = 7)6:RIAGENDRMale 11.775246 7.272313 1.619 0.10547
## bs(BMXBMI, df = 7)7:RIAGENDRMale 2.670857 9.731143 0.274 0.78374
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.074 on 4968 degrees of freedom
## Multiple R-squared: 0.899, Adjusted R-squared: 0.8983
## F-statistic: 1426 on 31 and 4968 DF, p-value: < 2.2e-16
## Warning in bs(BMXBMI, degree = 3L, knots = c(`20%` = 23.698, `40%` = 26.5, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases

# base model:
run_model("BMXWAIST ~ bs(RIDAGEYR, df = 7) + bs(BMXBMI,df=7)*RIAGENDR + BMXWT + BMXHT")
##
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
##
## Residuals:
## Min 1Q Median 3Q Max
## -28.4478 -3.2060 0.0143 3.2382 21.5451
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 48.422415 6.767358 7.155 9.56e-13 ***
## bs(RIDAGEYR, df = 7)1 -0.531457 0.783318 -0.678 0.49751
## bs(RIDAGEYR, df = 7)2 0.605997 0.574605 1.055 0.29165
## bs(RIDAGEYR, df = 7)3 1.110928 0.626132 1.774 0.07608 .
## bs(RIDAGEYR, df = 7)4 3.802206 0.543100 7.001 2.88e-12 ***
## bs(RIDAGEYR, df = 7)5 6.362188 0.706825 9.001 < 2e-16 ***
## bs(RIDAGEYR, df = 7)6 8.811666 0.771186 11.426 < 2e-16 ***
## bs(RIDAGEYR, df = 7)7 8.683451 0.855397 10.151 < 2e-16 ***
## bs(BMXBMI, df = 7)1 -1.586848 4.787880 -0.331 0.74033
## bs(BMXBMI, df = 7)2 9.718095 3.358780 2.893 0.00383 **
## bs(BMXBMI, df = 7)3 16.581586 3.911192 4.240 2.28e-05 ***
## bs(BMXBMI, df = 7)4 21.432070 3.985095 5.378 7.87e-08 ***
## bs(BMXBMI, df = 7)5 37.720486 5.185866 7.274 4.04e-13 ***
## bs(BMXBMI, df = 7)6 43.694622 6.490055 6.733 1.86e-11 ***
## bs(BMXBMI, df = 7)7 46.741394 8.108147 5.765 8.67e-09 ***
## RIAGENDRMale -0.569421 5.815111 -0.098 0.92200
## BMXWT 0.337670 0.044072 7.662 2.19e-14 ***
## BMXHT 0.003489 0.044078 0.079 0.93691
## bs(BMXBMI, df = 7)1:RIAGENDRMale 2.983259 8.147514 0.366 0.71426
## bs(BMXBMI, df = 7)2:RIAGENDRMale -1.110409 5.353139 -0.207 0.83568
## bs(BMXBMI, df = 7)3:RIAGENDRMale 1.356751 5.979405 0.227 0.82051
## bs(BMXBMI, df = 7)4:RIAGENDRMale 1.696008 5.776870 0.294 0.76909
## bs(BMXBMI, df = 7)5:RIAGENDRMale 2.797453 6.272203 0.446 0.65561
## bs(BMXBMI, df = 7)6:RIAGENDRMale 11.980762 7.275066 1.647 0.09966 .
## bs(BMXBMI, df = 7)7:RIAGENDRMale 2.469160 9.733345 0.254 0.79975
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.077 on 4975 degrees of freedom
## Multiple R-squared: 0.8987, Adjusted R-squared: 0.8982
## F-statistic: 1838 on 24 and 4975 DF, p-value: < 2.2e-16
## Warning in bs(BMXBMI, degree = 3L, knots = c(`20%` = 23.698, `40%` = 26.5, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases

# grid.arrange(g1, g2,g3, nrow=3)
regression models with variables
base_form <- "BMXWAIST ~ bs(RIDAGEYR, df = 7) + bs(BMXBMI,df=7)*RIAGENDR + BMXWT + BMXHT + "
# MFA 16:1 (Hexadecenoic) (gm)
run_model(paste0(base_form,"DR1TM161"))
##
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
##
## Residuals:
## Min 1Q Median 3Q Max
## -28.2867 -3.2198 0.0061 3.2529 21.5415
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 48.587258 6.767025 7.180 8.00e-13 ***
## bs(RIDAGEYR, df = 7)1 -0.508330 0.783321 -0.649 0.51641
## bs(RIDAGEYR, df = 7)2 0.602174 0.574517 1.048 0.29463
## bs(RIDAGEYR, df = 7)3 1.111861 0.626031 1.776 0.07579 .
## bs(RIDAGEYR, df = 7)4 3.795480 0.543028 6.989 3.12e-12 ***
## bs(RIDAGEYR, df = 7)5 6.315843 0.707290 8.930 < 2e-16 ***
## bs(RIDAGEYR, df = 7)6 8.768631 0.771519 11.365 < 2e-16 ***
## bs(RIDAGEYR, df = 7)7 8.644418 0.855599 10.103 < 2e-16 ***
## bs(BMXBMI, df = 7)1 -1.693851 4.787559 -0.354 0.72350
## bs(BMXBMI, df = 7)2 9.612337 3.358871 2.862 0.00423 **
## bs(BMXBMI, df = 7)3 16.453496 3.911358 4.207 2.64e-05 ***
## bs(BMXBMI, df = 7)4 21.321622 3.985032 5.350 9.17e-08 ***
## bs(BMXBMI, df = 7)5 37.561334 5.185957 7.243 5.07e-13 ***
## bs(BMXBMI, df = 7)6 43.526103 6.489837 6.707 2.21e-11 ***
## bs(BMXBMI, df = 7)7 46.480074 8.108438 5.732 1.05e-08 ***
## RIAGENDRMale -0.725807 5.814969 -0.125 0.90067
## BMXWT 0.338758 0.044070 7.687 1.81e-14 ***
## BMXHT 0.003619 0.044071 0.082 0.93456
## DR1TM161 -0.137338 0.084893 -1.618 0.10577
## bs(BMXBMI, df = 7)1:RIAGENDRMale 3.251670 8.147879 0.399 0.68985
## bs(BMXBMI, df = 7)2:RIAGENDRMale -0.938861 5.353319 -0.175 0.86079
## bs(BMXBMI, df = 7)3:RIAGENDRMale 1.570757 5.979897 0.263 0.79281
## bs(BMXBMI, df = 7)4:RIAGENDRMale 1.873999 5.776979 0.324 0.74566
## bs(BMXBMI, df = 7)5:RIAGENDRMale 3.075553 6.273539 0.490 0.62398
## bs(BMXBMI, df = 7)6:RIAGENDRMale 12.024983 7.273936 1.653 0.09836 .
## bs(BMXBMI, df = 7)7:RIAGENDRMale 2.975277 9.736791 0.306 0.75994
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.076 on 4974 degrees of freedom
## Multiple R-squared: 0.8987, Adjusted R-squared: 0.8982
## F-statistic: 1766 on 25 and 4974 DF, p-value: < 2.2e-16
## Warning in bs(BMXBMI, degree = 3L, knots = c(`20%` = 23.698, `40%` = 26.5, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases

hist(data$DR1TM161)

hist(invNorm(data$DR1TM161))

run_model(paste0(base_form,"invNorm(DR1TM161)"))
##
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
##
## Residuals:
## Min 1Q Median 3Q Max
## -28.2578 -3.2162 0.0063 3.2431 21.5752
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 48.408101 6.765339 7.155 9.56e-13 ***
## bs(RIDAGEYR, df = 7)1 -0.491274 0.783343 -0.627 0.53059
## bs(RIDAGEYR, df = 7)2 0.593253 0.574469 1.033 0.30180
## bs(RIDAGEYR, df = 7)3 1.121506 0.625968 1.792 0.07325 .
## bs(RIDAGEYR, df = 7)4 3.795222 0.542949 6.990 3.11e-12 ***
## bs(RIDAGEYR, df = 7)5 6.319020 0.706945 8.938 < 2e-16 ***
## bs(RIDAGEYR, df = 7)6 8.760815 0.771377 11.357 < 2e-16 ***
## bs(RIDAGEYR, df = 7)7 8.634479 0.855494 10.093 < 2e-16 ***
## bs(BMXBMI, df = 7)1 -1.817780 4.787851 -0.380 0.70421
## bs(BMXBMI, df = 7)2 9.554625 3.358778 2.845 0.00446 **
## bs(BMXBMI, df = 7)3 16.384344 3.911275 4.189 2.85e-05 ***
## bs(BMXBMI, df = 7)4 21.262094 3.984816 5.336 9.94e-08 ***
## bs(BMXBMI, df = 7)5 37.495251 5.185547 7.231 5.54e-13 ***
## bs(BMXBMI, df = 7)6 43.484731 6.488970 6.701 2.29e-11 ***
## bs(BMXBMI, df = 7)7 46.382756 8.107720 5.721 1.12e-08 ***
## RIAGENDRMale -0.781669 5.814348 -0.134 0.89306
## BMXWT 0.338766 0.044063 7.688 1.79e-14 ***
## BMXHT 0.004078 0.044066 0.093 0.92627
## invNorm(DR1TM161) -0.150832 0.075659 -1.994 0.04625 *
## bs(BMXBMI, df = 7)1:RIAGENDRMale 3.324562 8.146878 0.408 0.68323
## bs(BMXBMI, df = 7)2:RIAGENDRMale -0.885211 5.352731 -0.165 0.86866
## bs(BMXBMI, df = 7)3:RIAGENDRMale 1.635948 5.979259 0.274 0.78440
## bs(BMXBMI, df = 7)4:RIAGENDRMale 1.933776 5.776375 0.335 0.73781
## bs(BMXBMI, df = 7)5:RIAGENDRMale 3.151327 6.272841 0.502 0.61543
## bs(BMXBMI, df = 7)6:RIAGENDRMale 12.062262 7.273007 1.658 0.09728 .
## bs(BMXBMI, df = 7)7:RIAGENDRMale 3.055050 9.734874 0.314 0.75367
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.076 on 4974 degrees of freedom
## Multiple R-squared: 0.8988, Adjusted R-squared: 0.8982
## F-statistic: 1766 on 25 and 4974 DF, p-value: < 2.2e-16
## Warning in bs(BMXBMI, degree = 3L, knots = c(`20%` = 23.698, `40%` = 26.5, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases

# Dietary day one sample weight
run_model(paste0(base_form,"WTDRD1"))
##
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
##
## Residuals:
## Min 1Q Median 3Q Max
## -28.4547 -3.2044 0.0127 3.2389 21.5341
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.835e+01 6.783e+00 7.127 1.17e-12 ***
## bs(RIDAGEYR, df = 7)1 -5.295e-01 7.835e-01 -0.676 0.49922
## bs(RIDAGEYR, df = 7)2 6.067e-01 5.747e-01 1.056 0.29117
## bs(RIDAGEYR, df = 7)3 1.111e+00 6.262e-01 1.774 0.07620 .
## bs(RIDAGEYR, df = 7)4 3.806e+00 5.438e-01 7.000 2.90e-12 ***
## bs(RIDAGEYR, df = 7)5 6.357e+00 7.076e-01 8.984 < 2e-16 ***
## bs(RIDAGEYR, df = 7)6 8.813e+00 7.713e-01 11.426 < 2e-16 ***
## bs(RIDAGEYR, df = 7)7 8.678e+00 8.561e-01 10.136 < 2e-16 ***
## bs(BMXBMI, df = 7)1 -1.568e+00 4.790e+00 -0.327 0.74348
## bs(BMXBMI, df = 7)2 9.731e+00 3.360e+00 2.896 0.00379 **
## bs(BMXBMI, df = 7)3 1.660e+01 3.913e+00 4.242 2.25e-05 ***
## bs(BMXBMI, df = 7)4 2.145e+01 3.987e+00 5.380 7.79e-08 ***
## bs(BMXBMI, df = 7)5 3.775e+01 5.190e+00 7.274 4.03e-13 ***
## bs(BMXBMI, df = 7)6 4.372e+01 6.493e+00 6.734 1.84e-11 ***
## bs(BMXBMI, df = 7)7 4.678e+01 8.113e+00 5.767 8.58e-09 ***
## RIAGENDRMale -5.748e-01 5.816e+00 -0.099 0.92127
## BMXWT 3.374e-01 4.410e-02 7.652 2.37e-14 ***
## BMXHT 4.018e-03 4.420e-02 0.091 0.92757
## WTDRD1 -2.557e-07 1.552e-06 -0.165 0.86918
## bs(BMXBMI, df = 7)1:RIAGENDRMale 2.980e+00 8.148e+00 0.366 0.71461
## bs(BMXBMI, df = 7)2:RIAGENDRMale -1.113e+00 5.354e+00 -0.208 0.83533
## bs(BMXBMI, df = 7)3:RIAGENDRMale 1.358e+00 5.980e+00 0.227 0.82037
## bs(BMXBMI, df = 7)4:RIAGENDRMale 1.697e+00 5.777e+00 0.294 0.76896
## bs(BMXBMI, df = 7)5:RIAGENDRMale 2.801e+00 6.273e+00 0.446 0.65529
## bs(BMXBMI, df = 7)6:RIAGENDRMale 1.199e+01 7.276e+00 1.648 0.09945 .
## bs(BMXBMI, df = 7)7:RIAGENDRMale 2.478e+00 9.734e+00 0.255 0.79909
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.078 on 4974 degrees of freedom
## Multiple R-squared: 0.8987, Adjusted R-squared: 0.8982
## F-statistic: 1765 on 25 and 4974 DF, p-value: < 2.2e-16
## Warning in bs(BMXBMI, degree = 3L, knots = c(`20%` = 23.698, `40%` = 26.5, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases

# BMXARML - Upper Arm Length (cm)
run_model(paste0(base_form,"BMXARML"))
##
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
##
## Residuals:
## Min 1Q Median 3Q Max
## -28.4378 -3.2012 0.0298 3.2378 21.5431
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 48.76081 6.76760 7.205 6.67e-13 ***
## bs(RIDAGEYR, df = 7)1 -0.57038 0.78334 -0.728 0.46657
## bs(RIDAGEYR, df = 7)2 0.57984 0.57459 1.009 0.31296
## bs(RIDAGEYR, df = 7)3 1.09208 0.62603 1.744 0.08114 .
## bs(RIDAGEYR, df = 7)4 3.79216 0.54297 6.984 3.24e-12 ***
## bs(RIDAGEYR, df = 7)5 6.41928 0.70722 9.077 < 2e-16 ***
## bs(RIDAGEYR, df = 7)6 8.88023 0.77175 11.507 < 2e-16 ***
## bs(RIDAGEYR, df = 7)7 8.79530 0.85704 10.262 < 2e-16 ***
## bs(BMXBMI, df = 7)1 -1.63395 4.78656 -0.341 0.73284
## bs(BMXBMI, df = 7)2 9.73041 3.35782 2.898 0.00377 **
## bs(BMXBMI, df = 7)3 16.59364 3.91007 4.244 2.24e-05 ***
## bs(BMXBMI, df = 7)4 21.45620 3.98397 5.386 7.55e-08 ***
## bs(BMXBMI, df = 7)5 37.74824 5.18439 7.281 3.83e-13 ***
## bs(BMXBMI, df = 7)6 43.60914 6.48833 6.721 2.01e-11 ***
## bs(BMXBMI, df = 7)7 46.73453 8.10581 5.766 8.63e-09 ***
## RIAGENDRMale -0.53496 5.81346 -0.092 0.92669
## BMXWT 0.34186 0.04411 7.750 1.11e-14 ***
## BMXHT 0.02057 0.04491 0.458 0.64706
## BMXARML -0.09566 0.04866 -1.966 0.04938 *
## bs(BMXBMI, df = 7)1:RIAGENDRMale 3.01879 8.14519 0.371 0.71093
## bs(BMXBMI, df = 7)2:RIAGENDRMale -1.08485 5.35161 -0.203 0.83937
## bs(BMXBMI, df = 7)3:RIAGENDRMale 1.36000 5.97768 0.228 0.82003
## bs(BMXBMI, df = 7)4:RIAGENDRMale 1.67878 5.77521 0.291 0.77130
## bs(BMXBMI, df = 7)5:RIAGENDRMale 2.76089 6.27043 0.440 0.65974
## bs(BMXBMI, df = 7)6:RIAGENDRMale 11.75667 7.27387 1.616 0.10610
## bs(BMXBMI, df = 7)7:RIAGENDRMale 2.30877 9.73089 0.237 0.81246
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.076 on 4974 degrees of freedom
## Multiple R-squared: 0.8987, Adjusted R-squared: 0.8982
## F-statistic: 1766 on 25 and 4974 DF, p-value: < 2.2e-16
## Warning in bs(BMXBMI, degree = 3L, knots = c(`20%` = 23.698, `40%` = 26.5, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases

#BMXLEG - Upper Leg Length (cm)
run_model(paste0(base_form,"BMXLEG"))
##
## Call:
## lm(formula = as.formula(formula_str), data = train_data_set)
##
## Residuals:
## Min 1Q Median 3Q Max
## -25.5270 -3.2304 0.0284 3.1615 22.0200
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 48.20742 6.62127 7.281 3.84e-13 ***
## bs(RIDAGEYR, df = 7)1 -0.82682 0.76666 -1.078 0.280878
## bs(RIDAGEYR, df = 7)2 0.36717 0.56243 0.653 0.513897
## bs(RIDAGEYR, df = 7)3 0.63721 0.61344 1.039 0.298971
## bs(RIDAGEYR, df = 7)4 2.95979 0.53436 5.539 3.20e-08 ***
## bs(RIDAGEYR, df = 7)5 5.45059 0.69425 7.851 5.02e-15 ***
## bs(RIDAGEYR, df = 7)6 7.45079 0.76002 9.803 < 2e-16 ***
## bs(RIDAGEYR, df = 7)7 8.35203 0.83722 9.976 < 2e-16 ***
## bs(BMXBMI, df = 7)1 -3.67781 4.68661 -0.785 0.432638
## bs(BMXBMI, df = 7)2 8.58713 3.28714 2.612 0.009020 **
## bs(BMXBMI, df = 7)3 14.75803 3.82870 3.855 0.000117 ***
## bs(BMXBMI, df = 7)4 19.64720 3.90089 5.037 4.91e-07 ***
## bs(BMXBMI, df = 7)5 35.13904 5.07685 6.921 5.04e-12 ***
## bs(BMXBMI, df = 7)6 39.83430 6.35520 6.268 3.97e-10 ***
## bs(BMXBMI, df = 7)7 44.25840 7.93484 5.578 2.57e-08 ***
## RIAGENDRMale -1.71525 5.69009 -0.301 0.763087
## BMXWT 0.35028 0.04313 8.122 5.75e-16 ***
## BMXHT 0.11264 0.04374 2.575 0.010047 *
## BMXLEG -0.42902 0.02873 -14.932 < 2e-16 ***
## bs(BMXBMI, df = 7)1:RIAGENDRMale 5.37460 7.97322 0.674 0.500291
## bs(BMXBMI, df = 7)2:RIAGENDRMale -0.27623 5.23787 -0.053 0.957944
## bs(BMXBMI, df = 7)3:RIAGENDRMale 2.72980 5.85104 0.467 0.640842
## bs(BMXBMI, df = 7)4:RIAGENDRMale 2.79959 5.65264 0.495 0.620430
## bs(BMXBMI, df = 7)5:RIAGENDRMale 3.92119 6.13725 0.639 0.522907
## bs(BMXBMI, df = 7)6:RIAGENDRMale 14.42007 7.11988 2.025 0.042887 *
## bs(BMXBMI, df = 7)7:RIAGENDRMale -0.63900 9.52549 -0.067 0.946518
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.967 on 4974 degrees of freedom
## Multiple R-squared: 0.903, Adjusted R-squared: 0.9025
## F-statistic: 1853 on 25 and 4974 DF, p-value: < 2.2e-16
## Warning in bs(BMXBMI, degree = 3L, knots = c(`20%` = 23.698, `40%` = 26.5, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases

# grid.arrange(g1, g2,g3, nrow=3)